{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "afinn_score\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>\\makecell{Toxicity\\\\score}</th>\n",
       "      <th>\\makecell{Afinn\\\\score}</th>\n",
       "      <th>\\makecell{Political\\\\vector}</th>\n",
       "      <th>\\makecell{Hate\\\\vector}</th>\n",
       "      <th>\\makecell{Political\\\\interest}</th>\n",
       "      <th>\\makecell{Political\\\\knowledge}</th>\n",
       "      <th>\\makecell{Hostility}</th>\n",
       "      <th>\\makecell{Female}</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Political hate expert</th>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.35}\\\\ }</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.28}\\\\ }</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.67}\\\\ }</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.45}\\\\ }</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.43}\\\\ }</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.37}\\\\ }</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.28}\\\\ }</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.11}\\\\ }</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Political hate</th>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.47}\\\\0.67}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.32}\\\\-0.61}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.91}\\\\0.94}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.87}\\\\0.9}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.33}\\\\0.41}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.35}\\\\0.44}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.24}\\\\0.3}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.07}\\\\-0.13}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Political hate classifier</th>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.34}\\\\0.57}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.25}\\\\-0.55}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.66}\\\\0.81}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.43}\\\\0.69}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.42}\\\\0.42}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.36}\\\\0.42}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.27}\\\\0.28}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.12}\\\\-0.1}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Political hate context</th>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.39}\\\\0.59}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.3}\\\\-0.62}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.63}\\\\0.84}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.47}\\\\0.65}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.32}\\\\0.43}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.33}\\\\0.44}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.24}\\\\0.27}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.13}\\\\-0.17}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Toxicity score</th>\n",
       "      <td>\\makecell{\\textcolor{gray}{1.0}\\\\1.0}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.24}\\\\-0.68}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.4}\\\\0.58}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.45}\\\\0.67}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.14}\\\\0.24}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.17}\\\\0.22}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.15}\\\\0.29}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.0}\\\\-0.05}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Afinn score</th>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.24}\\\\-0.68}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{1.0}\\\\1.0}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.3}\\\\-0.59}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.27}\\\\-0.53}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.14}\\\\-0.28}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.16}\\\\-0.29}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.12}\\\\-0.27}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.07}\\\\0.11}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Political vector</th>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.4}\\\\0.58}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.3}\\\\-0.59}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{1.0}\\\\1.0}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.6}\\\\0.7}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.36}\\\\0.45}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.38}\\\\0.48}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.22}\\\\0.27}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.12}\\\\-0.21}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Hate vector</th>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.45}\\\\0.67}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.27}\\\\-0.53}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.6}\\\\0.7}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{1.0}\\\\1.0}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.21}\\\\0.28}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.23}\\\\0.31}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.22}\\\\0.29}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.0}\\\\-0.01}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Political interest</th>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.14}\\\\0.24}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.14}\\\\-0.28}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.36}\\\\0.45}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.21}\\\\0.28}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{1.0}\\\\1.0}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.52}\\\\0.48}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.24}\\\\0.17}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.15}\\\\-0.18}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Political knowledge</th>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.17}\\\\0.22}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.16}\\\\-0.29}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.38}\\\\0.48}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.23}\\\\0.31}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.52}\\\\0.48}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{1.0}\\\\1.0}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.23}\\\\0.17}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.27}\\\\-0.3}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Hostility</th>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.15}\\\\0.29}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.12}\\\\-0.27}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.22}\\\\0.27}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.22}\\\\0.29}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.24}\\\\0.17}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.23}\\\\0.17}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{1.0}\\\\1.0}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.13}\\\\-0.12}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Female</th>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.0}\\\\-0.05}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{0.07}\\\\0.11}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.12}\\\\-0.21}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.0}\\\\-0.01}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.15}\\\\-0.18}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.27}\\\\-0.3}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{-0.13}\\\\-0.12}</td>\n",
       "      <td>\\makecell{\\textcolor{gray}{1.0}\\\\1.0}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                          \\makecell{Toxicity\\\\score}  \\\n",
       "Political hate expert           \\makecell{\\textcolor{gray}{0.35}\\\\ }   \n",
       "Political hate               \\makecell{\\textcolor{gray}{0.47}\\\\0.67}   \n",
       "Political hate classifier    \\makecell{\\textcolor{gray}{0.34}\\\\0.57}   \n",
       "Political hate context       \\makecell{\\textcolor{gray}{0.39}\\\\0.59}   \n",
       "Toxicity score                 \\makecell{\\textcolor{gray}{1.0}\\\\1.0}   \n",
       "Afinn score                \\makecell{\\textcolor{gray}{-0.24}\\\\-0.68}   \n",
       "Political vector              \\makecell{\\textcolor{gray}{0.4}\\\\0.58}   \n",
       "Hate vector                  \\makecell{\\textcolor{gray}{0.45}\\\\0.67}   \n",
       "Political interest           \\makecell{\\textcolor{gray}{0.14}\\\\0.24}   \n",
       "Political knowledge          \\makecell{\\textcolor{gray}{0.17}\\\\0.22}   \n",
       "Hostility                    \\makecell{\\textcolor{gray}{0.15}\\\\0.29}   \n",
       "Female                       \\makecell{\\textcolor{gray}{0.0}\\\\-0.05}   \n",
       "\n",
       "                                             \\makecell{Afinn\\\\score}  \\\n",
       "Political hate expert          \\makecell{\\textcolor{gray}{-0.28}\\\\ }   \n",
       "Political hate             \\makecell{\\textcolor{gray}{-0.32}\\\\-0.61}   \n",
       "Political hate classifier  \\makecell{\\textcolor{gray}{-0.25}\\\\-0.55}   \n",
       "Political hate context      \\makecell{\\textcolor{gray}{-0.3}\\\\-0.62}   \n",
       "Toxicity score             \\makecell{\\textcolor{gray}{-0.24}\\\\-0.68}   \n",
       "Afinn score                    \\makecell{\\textcolor{gray}{1.0}\\\\1.0}   \n",
       "Political vector            \\makecell{\\textcolor{gray}{-0.3}\\\\-0.59}   \n",
       "Hate vector                \\makecell{\\textcolor{gray}{-0.27}\\\\-0.53}   \n",
       "Political interest         \\makecell{\\textcolor{gray}{-0.14}\\\\-0.28}   \n",
       "Political knowledge        \\makecell{\\textcolor{gray}{-0.16}\\\\-0.29}   \n",
       "Hostility                  \\makecell{\\textcolor{gray}{-0.12}\\\\-0.27}   \n",
       "Female                       \\makecell{\\textcolor{gray}{0.07}\\\\0.11}   \n",
       "\n",
       "                                        \\makecell{Political\\\\vector}  \\\n",
       "Political hate expert           \\makecell{\\textcolor{gray}{0.67}\\\\ }   \n",
       "Political hate               \\makecell{\\textcolor{gray}{0.91}\\\\0.94}   \n",
       "Political hate classifier    \\makecell{\\textcolor{gray}{0.66}\\\\0.81}   \n",
       "Political hate context       \\makecell{\\textcolor{gray}{0.63}\\\\0.84}   \n",
       "Toxicity score                \\makecell{\\textcolor{gray}{0.4}\\\\0.58}   \n",
       "Afinn score                 \\makecell{\\textcolor{gray}{-0.3}\\\\-0.59}   \n",
       "Political vector               \\makecell{\\textcolor{gray}{1.0}\\\\1.0}   \n",
       "Hate vector                    \\makecell{\\textcolor{gray}{0.6}\\\\0.7}   \n",
       "Political interest           \\makecell{\\textcolor{gray}{0.36}\\\\0.45}   \n",
       "Political knowledge          \\makecell{\\textcolor{gray}{0.38}\\\\0.48}   \n",
       "Hostility                    \\makecell{\\textcolor{gray}{0.22}\\\\0.27}   \n",
       "Female                     \\makecell{\\textcolor{gray}{-0.12}\\\\-0.21}   \n",
       "\n",
       "                                             \\makecell{Hate\\\\vector}  \\\n",
       "Political hate expert           \\makecell{\\textcolor{gray}{0.45}\\\\ }   \n",
       "Political hate                \\makecell{\\textcolor{gray}{0.87}\\\\0.9}   \n",
       "Political hate classifier    \\makecell{\\textcolor{gray}{0.43}\\\\0.69}   \n",
       "Political hate context       \\makecell{\\textcolor{gray}{0.47}\\\\0.65}   \n",
       "Toxicity score               \\makecell{\\textcolor{gray}{0.45}\\\\0.67}   \n",
       "Afinn score                \\makecell{\\textcolor{gray}{-0.27}\\\\-0.53}   \n",
       "Political vector               \\makecell{\\textcolor{gray}{0.6}\\\\0.7}   \n",
       "Hate vector                    \\makecell{\\textcolor{gray}{1.0}\\\\1.0}   \n",
       "Political interest           \\makecell{\\textcolor{gray}{0.21}\\\\0.28}   \n",
       "Political knowledge          \\makecell{\\textcolor{gray}{0.23}\\\\0.31}   \n",
       "Hostility                    \\makecell{\\textcolor{gray}{0.22}\\\\0.29}   \n",
       "Female                      \\makecell{\\textcolor{gray}{-0.0}\\\\-0.01}   \n",
       "\n",
       "                                      \\makecell{Political\\\\interest}  \\\n",
       "Political hate expert           \\makecell{\\textcolor{gray}{0.43}\\\\ }   \n",
       "Political hate               \\makecell{\\textcolor{gray}{0.33}\\\\0.41}   \n",
       "Political hate classifier    \\makecell{\\textcolor{gray}{0.42}\\\\0.42}   \n",
       "Political hate context       \\makecell{\\textcolor{gray}{0.32}\\\\0.43}   \n",
       "Toxicity score               \\makecell{\\textcolor{gray}{0.14}\\\\0.24}   \n",
       "Afinn score                \\makecell{\\textcolor{gray}{-0.14}\\\\-0.28}   \n",
       "Political vector             \\makecell{\\textcolor{gray}{0.36}\\\\0.45}   \n",
       "Hate vector                  \\makecell{\\textcolor{gray}{0.21}\\\\0.28}   \n",
       "Political interest             \\makecell{\\textcolor{gray}{1.0}\\\\1.0}   \n",
       "Political knowledge          \\makecell{\\textcolor{gray}{0.52}\\\\0.48}   \n",
       "Hostility                    \\makecell{\\textcolor{gray}{0.24}\\\\0.17}   \n",
       "Female                     \\makecell{\\textcolor{gray}{-0.15}\\\\-0.18}   \n",
       "\n",
       "                                     \\makecell{Political\\\\knowledge}  \\\n",
       "Political hate expert           \\makecell{\\textcolor{gray}{0.37}\\\\ }   \n",
       "Political hate               \\makecell{\\textcolor{gray}{0.35}\\\\0.44}   \n",
       "Political hate classifier    \\makecell{\\textcolor{gray}{0.36}\\\\0.42}   \n",
       "Political hate context       \\makecell{\\textcolor{gray}{0.33}\\\\0.44}   \n",
       "Toxicity score               \\makecell{\\textcolor{gray}{0.17}\\\\0.22}   \n",
       "Afinn score                \\makecell{\\textcolor{gray}{-0.16}\\\\-0.29}   \n",
       "Political vector             \\makecell{\\textcolor{gray}{0.38}\\\\0.48}   \n",
       "Hate vector                  \\makecell{\\textcolor{gray}{0.23}\\\\0.31}   \n",
       "Political interest           \\makecell{\\textcolor{gray}{0.52}\\\\0.48}   \n",
       "Political knowledge            \\makecell{\\textcolor{gray}{1.0}\\\\1.0}   \n",
       "Hostility                    \\makecell{\\textcolor{gray}{0.23}\\\\0.17}   \n",
       "Female                      \\makecell{\\textcolor{gray}{-0.27}\\\\-0.3}   \n",
       "\n",
       "                                                \\makecell{Hostility}  \\\n",
       "Political hate expert           \\makecell{\\textcolor{gray}{0.28}\\\\ }   \n",
       "Political hate                \\makecell{\\textcolor{gray}{0.24}\\\\0.3}   \n",
       "Political hate classifier    \\makecell{\\textcolor{gray}{0.27}\\\\0.28}   \n",
       "Political hate context       \\makecell{\\textcolor{gray}{0.24}\\\\0.27}   \n",
       "Toxicity score               \\makecell{\\textcolor{gray}{0.15}\\\\0.29}   \n",
       "Afinn score                \\makecell{\\textcolor{gray}{-0.12}\\\\-0.27}   \n",
       "Political vector             \\makecell{\\textcolor{gray}{0.22}\\\\0.27}   \n",
       "Hate vector                  \\makecell{\\textcolor{gray}{0.22}\\\\0.29}   \n",
       "Political interest           \\makecell{\\textcolor{gray}{0.24}\\\\0.17}   \n",
       "Political knowledge          \\makecell{\\textcolor{gray}{0.23}\\\\0.17}   \n",
       "Hostility                      \\makecell{\\textcolor{gray}{1.0}\\\\1.0}   \n",
       "Female                     \\makecell{\\textcolor{gray}{-0.13}\\\\-0.12}   \n",
       "\n",
       "                                                   \\makecell{Female}  \n",
       "Political hate expert          \\makecell{\\textcolor{gray}{-0.11}\\\\ }  \n",
       "Political hate             \\makecell{\\textcolor{gray}{-0.07}\\\\-0.13}  \n",
       "Political hate classifier   \\makecell{\\textcolor{gray}{-0.12}\\\\-0.1}  \n",
       "Political hate context     \\makecell{\\textcolor{gray}{-0.13}\\\\-0.17}  \n",
       "Toxicity score               \\makecell{\\textcolor{gray}{0.0}\\\\-0.05}  \n",
       "Afinn score                  \\makecell{\\textcolor{gray}{0.07}\\\\0.11}  \n",
       "Political vector           \\makecell{\\textcolor{gray}{-0.12}\\\\-0.21}  \n",
       "Hate vector                 \\makecell{\\textcolor{gray}{-0.0}\\\\-0.01}  \n",
       "Political interest         \\makecell{\\textcolor{gray}{-0.15}\\\\-0.18}  \n",
       "Political knowledge         \\makecell{\\textcolor{gray}{-0.27}\\\\-0.3}  \n",
       "Hostility                  \\makecell{\\textcolor{gray}{-0.13}\\\\-0.12}  \n",
       "Female                         \\makecell{\\textcolor{gray}{1.0}\\\\1.0}  "
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#########################\n",
    "### Import libraries ####\n",
    "#########################\n",
    "\n",
    "\n",
    "from scripts_for_table_4 import *\n",
    "\n",
    "\n",
    "#####################################################\n",
    "### Create correlation matrix for full dataframe ####\n",
    "#####################################################\n",
    "\n",
    "#set up names\n",
    "names_df = [\"Political hate expert\",\n",
    "            'Political hate',\n",
    "            \"Political hate classifier\",\n",
    "            \"Political hate context\",\n",
    "            'Toxicity score', \n",
    "            'Afinn score', \n",
    "            'Political vector', \n",
    "            'Hate vector', \n",
    "            'Political interest', \n",
    "            'Political knowledge',\n",
    "            'Hostility',\n",
    "            \"Female\"]\n",
    "\n",
    "#read data\n",
    "full_df_scores = pd.read_parquet(\"full_df_scores.parquet\")\n",
    "\n",
    "#remove outliers\n",
    "for column in full_df_scores.columns:\n",
    "    if column in [\"female\",\"user.id\",\"predicted_class\",\"binary_pol_hate\"]:\n",
    "        pass\n",
    "    else:\n",
    "        if full_df_scores[column].median()*1.5==0:\n",
    "            print(column)\n",
    "            full_df_scores[column] = np.where(np.abs(full_df_scores[column]) > np.abs((full_df_scores[column].std()*1.5+full_df_scores[column].median())), np.nan, full_df_scores[column])\n",
    "        else:\n",
    "            full_df_scores[column] = np.where(np.abs(full_df_scores[column]) > np.abs((full_df_scores[column].median()*1.5+full_df_scores[column].median())), np.nan, full_df_scores[column])     \n",
    "\n",
    "#create correlation matrix\n",
    "table = full_df_scores.groupby(\"user.id\").mean().corr(\"spearman\").round(2)\n",
    "table.columns = names_df[1:]\n",
    "table.index = table.columns\n",
    "\n",
    "#add empty names since this variable does not exist for full dataset\n",
    "empty_df = pd.DataFrame({\"Political hate expert\":[\" \"]*11})\n",
    "empty_df = empty_df.T\n",
    "empty_df.columns = [\"Political hate expert\",\n",
    "                    \"Political hate classifier\",\n",
    "                    'Political hate','Toxicity score', 'Afinn score', \n",
    "       'Political vector', \n",
    "       'Hate vector', 'Political interest', 'Political knowledge',\n",
    "       'Hostility',\"Female\"]\n",
    "\n",
    "table = pd.concat([empty_df,table])\n",
    "\n",
    "\n",
    "#####################################################\n",
    "### Create correlation matrix for annotation df  ####\n",
    "#####################################################\n",
    "\n",
    "#read data\n",
    "annotation_df = pd.read_parquet(\"annotation_sample.parquet\")\n",
    "\n",
    "#remove outliers\n",
    "for column in annotation_df.columns:\n",
    "    if column in [\"female\",\"predicted_class\",\"class\",\"user.id\"]:\n",
    "        pass\n",
    "    else:\n",
    "        if annotation_df[column].median()*1.5==0:\n",
    "            annotation_df[column] = np.where(np.abs(annotation_df[column]) > np.abs((annotation_df[column].std()*1.5+annotation_df[column].median())), np.nan, annotation_df[column])\n",
    "        else:\n",
    "            annotation_df[column] = np.where(np.abs(annotation_df[column]) > np.abs((annotation_df[column].median()*1.5+annotation_df[column].median())), np.nan, annotation_df[column])\n",
    "                \n",
    "annotation_cor_table = annotation_df.groupby(\"user.id\").mean().corr(\"spearman\").round(2)\n",
    "\n",
    "#change names to match above\n",
    "annotation_cor_table.columns = names_df\n",
    "\n",
    "#add information from table above in cells\n",
    "for column in annotation_cor_table.columns:\n",
    "    annotation_cor_table[column] = \"\\makecell{\"+\"\\textcolor{gray}{\"+annotation_cor_table[column].astype(\"str\").values+\"}\\\\\\\\\"+table[column].astype(\"str\").values+ \"}\"\n",
    "    annotation_cor_table[column] = annotation_cor_table[column].str.replace(\"nan\", \" \")\n",
    "    \n",
    "#prepare for latex\n",
    "annotation_cor_table.index = annotation_cor_table.columns\n",
    "annotation_cor_table.columns = [x.replace(x, \"\\makecell{\" + x+ \"}\") for x in annotation_cor_table.columns]\n",
    "mask = np.triu(np.ones_like(annotation_cor_table, dtype=np.bool),1)\n",
    "annotation_cor_table.mask(mask).astype(\"str\").replace(\"nan\",\" \")\n",
    "annotation_cor_table.columns = [str(x).replace(\" \", \"\\\\\\\\\") for x in annotation_cor_table.columns]\n",
    "annotation_cor_table.iloc[:,4:].to_latex(\"cor_table_facebook_both.tex\" ,escape = False, column_format = \"l\"+\"c\"*len(annotation_cor_table.columns))\n",
    "annotation_cor_table.iloc[:,4:]\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dask2",
   "language": "python",
   "name": "dask2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
